local dir `c(pwd)'
clear
set obs 8
generate id=_n
generate str description=""
replace description=string(id)+" Original string" in 1
replace description=string(id)+" String without spaces" in 2
replace description=string(id)+" Space insertion: language specific word length" in 3
replace description=string(id)+" Space insertion: language specific no." in 4
replace description=string(id)+" Space insertion: no. according to p50" in 5
replace description=string(id)+" Space insertion: no. according to p25 (longer words)" in 6
replace description=string(id)+" Space insertion: no. according to p75 (shorter words)" in 7
replace description=string(id)+" Method test: randomization/masking applied twice" in 8
generate str text_example=""
foreach book in Acts John Luke Mark Matthew Revelation {
generate `book'_spearman=.
generate `book'_R=.
}
qui {
local i=1
foreach letter in a u b c d e f g {
replace text_example=usubstr(fileread("`dir'\length_validation_II_`letter'\\eng-x-bible-kingjames_44_original.txt"),1,100) in `i'
foreach book in Acts John Luke Mark Matthew Revelation {
	noisily di "`book' `letter'"
	if "`book'"=="Acts" {
		local book_number=44
	}
	if "`book'"=="John" {
		local book_number=43
	}
	if "`book'"=="Luke" {
		local book_number=42
	}
	if "`book'"=="Mark" {
		local book_number=41
	}
	if "`book'"=="Matthew" {
		local book_number=40
	}
	if "`book'"=="Revelation" {
		local book_number=66
	}
	preserve
	use if book==`book_number' using entropy_bible_validation_II_`letter', clear  
	merge m:1 trans using data_bible_final, keep(3) 
	collapse (mean) D*, by(language ISO) fast
	spearman D_order D_structure
	local rho: di %3.2f r(rho)
	gen reciprocal=1/D_order
	corr D_structure rec
	local corr: di %3.2f (r(rho))^2
	restore
	replace `book'_spearman=`rho' in `i'
	replace `book'_R=(`corr') in `i'
	}
capture `++i'
}
}
drop id
export excel using "Table_S1", firstrow(varlabels) replace
exit
qui {
foreach language in vie-x-bible-newworld deu-x-bible-schlachter esk-x-bible { 
use `"`dir'\corpus\\`language'"', clear
drop in 1/8
/*Acts*/
keep if real(usubstr(vid,1,2))==44
gen sentid=_n
	gen sentencelength=wordcount(sentence)	
 	expand sentencelength
	sort sentid
	bysort sentid: gen wordid=_n
	gen word=word(sentence,wordid)
keep word
gen wordlength=ustrlen(word)
sum wordlength
local mean: di %3.2f r(mean)
local SD: di %3.2f r(sd)
local N=r(N)
noisily di "`language' | Mean: `mean' | SD: `SD' | N: `N'" 
}
}
exit